比较不同组合组合优化器在不同规模问题上的性能;
下面的结果主要比较alphamind
和python
中其他优化器的性能差别,我们将尽可能使用cvxopt
中的优化器,其次选择scipy
;
由于scipy
在ashare_ex
上面性能太差,所以一般忽略scipy
在这个股票池上的表现;
时间单位都是毫秒。
请在环境变量中设置DB_URI
指向数据库
In [1]:
import os
import timeit
import numpy as np
import pandas as pd
import cvxpy
from alphamind.api import *
from alphamind.portfolio.linearbuilder import linear_builder
from alphamind.portfolio.meanvariancebuilder import mean_variance_builder
from alphamind.portfolio.meanvariancebuilder import target_vol_builder
pd.options.display.float_format = '{:,.2f}'.format
In [2]:
ref_date = '2018-02-08'
u_names = ['sh50', 'hs300', 'zz500', 'zz800', 'zz1000', 'ashare_ex']
b_codes = [16, 300, 905, 906, 852, None]
risk_model = 'short'
factor = 'EPS'
lb = 0.0
ub = 0.1
data_source = os.environ['DB_URI']
engine = SqlEngine(data_source)
universes = [Universe(u_name) for u_name in u_names]
codes_set = [engine.fetch_codes(ref_date, universe=universe) for universe in universes]
data_set = [engine.fetch_data(ref_date, factor, codes, benchmark=b_code, risk_model=risk_model) for codes, b_code in zip(codes_set, b_codes)]
In [3]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1
for u_name, sample_data in zip(u_names, data_set):
factor_data = sample_data['factor']
er = factor_data[factor].values
n = len(er)
lbound = np.ones(n) * lb
ubound = np.ones(n) * ub
risk_constraints = np.ones((n, 1))
risk_target = (np.array([1.]), np.array([1.]))
status, y, x1 = linear_builder(er, lbound, ubound, risk_constraints, risk_target)
elasped_time1 = timeit.timeit("linear_builder(er, lbound, ubound, risk_constraints, risk_target)", number=number, globals=globals()) / number * 1000
A_eq = risk_constraints.T
b_eq = np.array([1.])
w = cvxpy.Variable(n)
curr_risk_exposure = w * risk_constraints
constraints = [w >= lbound,
w <= ubound,
curr_risk_exposure == risk_target[0]]
objective = cvxpy.Minimize(-w.T * er)
prob = cvxpy.Problem(objective, constraints)
prob.solve(solver='ECOS')
elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
number=number, globals=globals()) / number * 1000
np.testing.assert_almost_equal(x1 @ er, np.array(w.value).flatten() @ er, 4)
df.loc['alphamind', u_name] = elasped_time1
df.loc['cvxpy', u_name] = elasped_time2
alpha_logger.info(f"{u_name} is finished")
In [4]:
df
Out[4]:
In [5]:
prob.value
Out[5]:
In [6]:
from cvxpy import pnorm
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind (clp simplex)', 'alphamind (clp interior)', 'alphamind (ecos)'])
turn_over_target = 0.5
number = 1
for u_name, sample_data in zip(u_names, data_set):
factor_data = sample_data['factor']
er = factor_data[factor].values
n = len(er)
lbound = np.ones(n) * lb
ubound = np.ones(n) * ub
if 'weight' in factor_data:
current_position = factor_data.weight.values
else:
current_position = np.ones_like(er) / len(er)
risk_constraints = np.ones((len(er), 1))
risk_target = (np.array([1.]), np.array([1.]))
status, y, x1 = linear_builder(er,
lbound,
ubound,
risk_constraints,
risk_target,
turn_over_target=turn_over_target,
current_position=current_position,
method='interior')
elasped_time1 = timeit.timeit("""linear_builder(er,
lbound,
ubound,
risk_constraints,
risk_target,
turn_over_target=turn_over_target,
current_position=current_position,
method='interior')""", number=number, globals=globals()) / number * 1000
w = cvxpy.Variable(n)
curr_risk_exposure = risk_constraints.T @ w
constraints = [w >= lbound,
w <= ubound,
curr_risk_exposure == risk_target[0],
pnorm(w - current_position, 1) <= turn_over_target]
objective = cvxpy.Minimize(-w.T * er)
prob = cvxpy.Problem(objective, constraints)
prob.solve(solver='ECOS')
elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
number=number, globals=globals()) / number * 1000
status, y, x2 = linear_builder(er,
lbound,
ubound,
risk_constraints,
risk_target,
turn_over_target=turn_over_target,
current_position=current_position,
method='simplex')
elasped_time3 = timeit.timeit("""linear_builder(er,
lbound,
ubound,
risk_constraints,
risk_target,
turn_over_target=turn_over_target,
current_position=current_position,
method='simplex')""", number=number, globals=globals()) / number * 1000
status, y, x3 = linear_builder(er,
lbound,
ubound,
risk_constraints,
risk_target,
turn_over_target=turn_over_target,
current_position=current_position,
method='ecos')
elasped_time4 = timeit.timeit("""linear_builder(er,
lbound,
ubound,
risk_constraints,
risk_target,
turn_over_target=turn_over_target,
current_position=current_position,
method='ecos')""", number=number, globals=globals()) / number * 1000
np.testing.assert_almost_equal(x1 @ er, np.array(w.value).flatten() @ er, 4)
np.testing.assert_almost_equal(x2 @ er, np.array(w.value).flatten() @ er, 4)
np.testing.assert_almost_equal(x3 @ er, np.array(w.value).flatten() @ er, 4)
df.loc['alphamind (clp interior)', u_name] = elasped_time1
df.loc['alphamind (clp simplex)', u_name] = elasped_time3
df.loc['alphamind (ecos)', u_name] = elasped_time4
df.loc['cvxpy', u_name] = elasped_time2
alpha_logger.info(f"{u_name} is finished")
In [7]:
df
Out[7]:
In [8]:
from cvxpy import *
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1
for u_name, sample_data in zip(u_names, data_set):
all_styles = risk_styles + industry_styles + ['COUNTRY']
factor_data = sample_data['factor']
risk_cov = sample_data['risk_cov'][all_styles].values
risk_exposure = factor_data[all_styles].values
special_risk = factor_data.srisk.values
sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
er = factor_data[factor].values
n = len(er)
bm = np.zeros(n)
lbound = -np.ones(n) * np.inf
ubound = np.ones(n) * np.inf
risk_model = dict(cov=None, factor_cov=risk_cov/10000., factor_loading=risk_exposure, idsync=(special_risk**2)/10000.)
status, y, x1 = mean_variance_builder(er,
risk_model,
bm,
lbound,
ubound,
None,
None,
lam=1)
elasped_time1 = timeit.timeit("""mean_variance_builder(er,
risk_model,
bm,
lbound,
ubound,
None,
None,
lam=1)""",
number=number, globals=globals()) / number * 1000
w = cvxpy.Variable(n)
risk = sum_squares(multiply(special_risk / 100., w)) + quad_form((w.T * risk_exposure).T, risk_cov / 10000.)
objective = cvxpy.Minimize(-w.T * er + 0.5 * risk)
prob = cvxpy.Problem(objective)
prob.solve(solver='ECOS')
elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
number=number, globals=globals()) / number * 1000
u1 = -x1 @ er + 0.5 * x1 @ sec_cov @ x1
x2 = np.array(w.value).flatten()
u2 = -x2 @ er + 0.5 * x2 @ sec_cov @ x2
np.testing.assert_array_almost_equal(u1, u2, 4)
df.loc['alphamind', u_name] = elasped_time1
df.loc['cvxpy', u_name] = elasped_time2
alpha_logger.info(f"{u_name} is finished")
In [9]:
df
Out[9]:
In [10]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1
for u_name, sample_data in zip(u_names, data_set):
all_styles = risk_styles + industry_styles + ['COUNTRY']
factor_data = sample_data['factor']
risk_cov = sample_data['risk_cov'][all_styles].values
risk_exposure = factor_data[all_styles].values
special_risk = factor_data.srisk.values
sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
er = factor_data[factor].values
n = len(er)
bm = np.zeros(n)
lbound = np.zeros(n)
ubound = np.ones(n) * 0.1
risk_model = dict(cov=None, factor_cov=risk_cov/10000., factor_loading=risk_exposure, idsync=(special_risk**2)/10000.)
status, y, x1 = mean_variance_builder(er,
risk_model,
bm,
lbound,
ubound,
None,
None)
elasped_time1 = timeit.timeit("""mean_variance_builder(er,
risk_model,
bm,
lbound,
ubound,
None,
None)""",
number=number, globals=globals()) / number * 1000
w = cvxpy.Variable(n)
risk = sum_squares(multiply(special_risk / 100., w)) + quad_form((w.T * risk_exposure).T, risk_cov / 10000.)
objective = cvxpy.Minimize(-w.T * er + 0.5 * risk)
constraints = [w >= lbound,
w <= ubound]
prob = cvxpy.Problem(objective, constraints)
prob.solve(solver='ECOS')
elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
number=number, globals=globals()) / number * 1000
u1 = -x1 @ er + 0.5 * x1 @ sec_cov @ x1
x2 = np.array(w.value).flatten()
u2 = -x2 @ er + 0.5 * x2 @ sec_cov @ x2
np.testing.assert_array_almost_equal(u1, u2, 4)
df.loc['alphamind', u_name] = elasped_time1
df.loc['cvxpy', u_name] = elasped_time2
alpha_logger.info(f"{u_name} is finished")
In [11]:
df
Out[11]:
In [12]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1
for u_name, sample_data in zip(u_names, data_set):
all_styles = risk_styles + industry_styles + ['COUNTRY']
factor_data = sample_data['factor']
risk_cov = sample_data['risk_cov'][all_styles].values
risk_exposure = factor_data[all_styles].values
special_risk = factor_data.srisk.values
sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
er = factor_data[factor].values
n = len(er)
bm = np.zeros(n)
lbound = np.zeros(n)
ubound = np.ones(n) * 0.1
risk_constraints = np.ones((len(er), 1))
risk_target = (np.array([1.]), np.array([1.]))
risk_model = dict(cov=None, factor_cov=risk_cov/10000., factor_loading=risk_exposure, idsync=(special_risk**2)/10000.)
status, y, x1 = mean_variance_builder(er,
risk_model,
bm,
lbound,
ubound,
risk_constraints,
risk_target)
elasped_time1 = timeit.timeit("""mean_variance_builder(er,
risk_model,
bm,
lbound,
ubound,
risk_constraints,
risk_target)""",
number=number, globals=globals()) / number * 1000
w = cvxpy.Variable(n)
risk = sum_squares(multiply(special_risk / 100., w)) + quad_form((w.T * risk_exposure).T, risk_cov / 10000.)
objective = cvxpy.Minimize(-w.T * er + 0.5 * risk)
curr_risk_exposure = risk_constraints.T @ w
constraints = [w >= lbound,
w <= ubound,
curr_risk_exposure == risk_target[0]]
prob = cvxpy.Problem(objective, constraints)
prob.solve(solver='ECOS')
elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
number=number, globals=globals()) / number * 1000
u1 = -x1 @ er + 0.5 * x1 @ sec_cov @ x1
x2 = np.array(w.value).flatten()
u2 = -x2 @ er + 0.5 * x2 @ sec_cov @ x2
np.testing.assert_array_almost_equal(u1, u2, 4)
df.loc['alphamind', u_name] = elasped_time1
df.loc['cvxpy', u_name] = elasped_time2
alpha_logger.info(f"{u_name} is finished")
In [13]:
df
Out[13]:
In [14]:
df = pd.DataFrame(columns=u_names, index=['cvxpy', 'alphamind'])
number = 1
target_vol = 0.5
for u_name, sample_data in zip(u_names, data_set):
all_styles = risk_styles + industry_styles + ['COUNTRY']
factor_data = sample_data['factor']
risk_cov = sample_data['risk_cov'][all_styles].values
risk_exposure = factor_data[all_styles].values
special_risk = factor_data.srisk.values
sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag(special_risk ** 2) / 10000
er = factor_data[factor].values
n = len(er)
if 'weight' in factor_data:
bm = factor_data.weight.values
else:
bm = np.ones_like(er) / n
lbound = np.zeros(n)
ubound = np.ones(n) * 0.1
risk_constraints = np.ones((n, 1))
risk_target = (np.array([bm.sum()]), np.array([bm.sum()]))
risk_model = dict(cov=None, factor_cov=risk_cov/10000., factor_loading=risk_exposure, idsync=(special_risk**2)/10000.)
status, y, x1 = target_vol_builder(er,
risk_model,
bm,
lbound,
ubound,
risk_constraints,
risk_target,
vol_target=target_vol)
elasped_time1 = timeit.timeit("""target_vol_builder(er,
risk_model,
bm,
lbound,
ubound,
risk_constraints,
risk_target,
vol_target=target_vol)""",
number=number, globals=globals()) / number * 1000
w = cvxpy.Variable(n)
risk = sum_squares(multiply(special_risk / 100., w)) + quad_form((w.T * risk_exposure).T, risk_cov / 10000.)
objective = cvxpy.Minimize(-w.T * er)
curr_risk_exposure = risk_constraints.T @ w
constraints = [w >= lbound,
w <= ubound,
curr_risk_exposure == risk_target[0],
risk <= target_vol * target_vol]
prob = cvxpy.Problem(objective, constraints)
prob.solve(solver='ECOS')
elasped_time2 = timeit.timeit("prob.solve(solver='ECOS')",
number=number, globals=globals()) / number * 1000
u1 = -x1 @ er
x2 = np.array(w.value).flatten()
u2 = -x2 @ er
np.testing.assert_array_almost_equal(u1, u2, 4)
df.loc['alphamind', u_name] = elasped_time1
df.loc['cvxpy', u_name] = elasped_time2
alpha_logger.info(f"{u_name} is finished")
In [15]:
df
Out[15]:
In [ ]: